2 // GetMetadataForHTMLLog-Additions.m
3 // AdiumSpotlightImporter
5 // Created by Evan Schoenberg on 5/25/06.
8 #import "GetMetadataForHTMLLog-Additions.h"
11 * @brief These additions are all from AIUtilities
13 * The spotlight importer should include this file to get these specific additions.
14 * If the GetMetadataForHTMLLog class is used in a situation in which AIUtilities is linked in already, it is
15 * not necessary to include this implementation file.
17 @implementation NSScanner (AdiumSpotlightImporterAdditions)
19 - (BOOL)scanUnsignedInt:(unsigned int *)unsignedIntValue
21 //skip characters if necessary
22 NSCharacterSet *skipSet = [self charactersToBeSkipped];
23 [self setCharactersToBeSkipped:nil];
24 [self scanCharactersFromSet:skipSet intoString:NULL];
25 [self setCharactersToBeSkipped:skipSet];
27 NSString *string = [self string];
28 NSRange range = NSMakeRange([self scanLocation], 0);
29 register unsigned length = [string length] - range.location; //register because it is used in the loop below.
30 range.length = length;
32 unichar *buf = malloc(length * sizeof(unichar));
33 [string getCharacters:buf range:range];
35 register unsigned i = 0;
37 if (length && (buf[i] == '+')) {
40 if (i >= length) return NO;
41 if ((buf[i] < '0') || (buf[i] > '9')) return NO;
45 if ((buf[i] >= '0') && (buf[i] <= '9')) {
47 total += buf[i] - '0';
53 [self setScanLocation:i];
54 *unsignedIntValue = total;
61 @implementation NSString (AdiumSpotlightImporterAdditions)
63 BOOL AIGetSurrogates(UTF32Char in, UTF16Char *outHigh, UTF16Char *outLow)
66 if (outHigh) *outHigh = 0;
67 if (outLow) *outLow = in;
71 UTF32LowShiftToUTF16High = 10,
72 UTF32HighShiftToUTF16High,
73 UTF16HighMask = 31, //0b0000 0111 1100 0000
74 UTF16LowMask = 63, //0b0000 0000 0011 1111
75 UTF32LowMask = 1023, //0b0000 0011 1111 1111
76 UTF16HighAdditiveMask = 55296, //0b1101 1000 0000 0000
77 UTF16LowAdditiveMask = 56320, //0b1101 1100 0000 0000
82 ((in >> UTF32HighShiftToUTF16High) & UTF16HighMask) \
83 | ((in >> UTF32LowShiftToUTF16High) & UTF16LowMask) \
84 | UTF16HighAdditiveMask;
88 *outLow = (in & UTF32LowMask) | UTF16LowAdditiveMask;
95 - (NSString *)stringByUnescapingFromHTML
97 if ([self length] == 0) return [[self copy] autorelease]; //avoids various RangeExceptions.
99 static NSString *ampersand = @"&", *semicolon = @";";
101 NSString *segment = nil, *entity = nil;
102 NSScanner *scanner = [NSScanner scannerWithString:self];
103 [scanner setCaseSensitive:YES];
104 unsigned myLength = [self length];
105 NSMutableString *result = [NSMutableString string];
108 if ([scanner scanUpToString:ampersand intoString:&segment] || [self characterAtIndex:[scanner scanLocation]] == '&') {
110 [result appendString:segment];
113 if (![scanner isAtEnd]) {
114 [scanner setScanLocation:[scanner scanLocation]+1];
117 if ([scanner scanUpToString:semicolon intoString:&entity]) {
119 if ([entity characterAtIndex:0] == '#') {
120 NSScanner *numScanner;
121 unichar secondCharacter;
124 numScanner = [NSScanner scannerWithString:entity];
125 [numScanner setCaseSensitive:YES];
126 secondCharacter = [entity characterAtIndex:1];
128 if (secondCharacter == 'x' || secondCharacter == 'X') {
129 //hexadecimal: "#x..." or "#X..."
130 [numScanner setScanLocation:2];
131 appendIt = [numScanner scanHexInt:&number];
135 [numScanner setScanLocation:1];
136 appendIt = [numScanner scanUnsignedInt:&number];
140 unichar chars[2] = { number, 0xffff };
142 if (number > 0xffff) {
143 //split into surrogate pair
144 AIGetSurrogates(number, &chars[0], &chars[1]);
147 CFStringAppendCharacters((CFMutableStringRef)result, chars, length);
150 //named entity. for now, we only support the five essential ones.
151 static NSDictionary *entityNames = nil;
152 if (entityNames == nil) {
153 entityNames = [[NSDictionary alloc] initWithObjectsAndKeys:
154 [NSNumber numberWithUnsignedInt:'"'], @"quot",
155 [NSNumber numberWithUnsignedInt:'&'], @"amp",
156 [NSNumber numberWithUnsignedInt:'<'], @"lt",
157 [NSNumber numberWithUnsignedInt:'>'], @"gt",
158 [NSNumber numberWithUnsignedInt:' '], @"nbsp",
161 number = [[entityNames objectForKey:[entity lowercaseString]] unsignedIntValue];
163 [result appendFormat:@"%C", (unichar)number];
166 if (![scanner isAtEnd]) {
167 [scanner setScanLocation:[scanner scanLocation]+1];
169 } //if ([scanner scanUpToString:semicolon intoString:&entity])
170 } while ([scanner scanLocation] < myLength);
171 // NSLog(@"unescaped %@\ninto %@", self, result);
177 * @brief Read a string from a file, assuming it to be UTF8
179 * If it can not be read as UTF8, it will be read as ASCII.
181 + (NSString *)stringWithContentsOfUTF8File:(NSString *)path
185 if ((floor(kCFCoreFoundationVersionNumber) > kCFCoreFoundationVersionNumber10_3)) {
186 NSError *error = nil;
188 string = [NSString stringWithContentsOfFile:path
189 encoding:NSUTF8StringEncoding
195 if ([[error domain] isEqualToString:NSCocoaErrorDomain]) {
196 int errorCode = [error code];
198 //XXX - I'm sure these constants are defined somewhere, but I can't find them. -eds
199 if (errorCode == 260) {
204 } else if (errorCode == 261) {
205 /* Reason: File could not be opened using text encoding Unicode (UTF-8).
206 * Description: Text encoding Unicode (UTF-8) is not applicable.
208 * We couldn't read the file as UTF8. Let the system try to determine the encoding.
210 NSError *newError = nil;
212 string = [NSString stringWithContentsOfFile:path
213 encoding:NSASCIIStringEncoding
216 //If there isn't a new error, we recovered reasonably successfully...
224 NSLog(@"Error reading %@:\n%@; %@.",path,
225 [error localizedDescription], [error localizedFailureReason]);
230 NSData *data = [NSData dataWithContentsOfFile:path];
233 string = [[[NSString alloc] initWithData:data
234 encoding:NSUTF8StringEncoding] autorelease];
236 string = [[[NSString alloc] initWithData:data
237 encoding:NSASCIIStringEncoding] autorelease];
241 NSLog(@"Error reading %@",path);